import pandas as pd
from pandas import DataFrame
from sklearn.cluster import KMeans
from sklearn.metrics import calinski_harabasz_score,silhouette_score
import numpy as np
import matplotlib.pyplot as plt

datafile = './文本/客运量_1-16.csv' 
data = pd.DataFrame(pd.read_csv(datafile))
data = data.drop(['机场名'], axis=1)#删除第一列机场名

train = np.array(data)
i = 5
kmeans = KMeans(n_clusters=i,random_state=0).fit_predict(train) #模型训练
score = silhouette_score(train,kmeans) #计算轮廓系数
print(score) #输出轮廓系数分数
kmeans2 = KMeans(n_clusters=i,random_state=0).fit(train)
score2 = calinski_harabasz_score(train,kmeans2.labels_) #计算输出Calinski-Harabasz指数
print(score2) #输出Calinski-Harabasz指数
plt.scatter(train[:,0],train[:,1],c=kmeans)
plt.title('KMEANS')
plt.show()